load data
USE_DC <- FALSE
text = read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EML Rosy/EMLLM/info/ia_label_mapping_opt_surprisal.csv')
if (USE_DC){eeg_dir = '/Volumes/Blue1TB/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_recomputed/'
} else {eeg_dir = '/Volumes/Blue1TB/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_nodc/'
}
file_pat = '_reading_N400_stats\\.csv$'
files=list.files(path = eeg_dir, pattern= file_pat)
Loop over participants
all<-c()
i<-0
for (f in files){
i=i+1
df <-read.csv(paste0(eeg_dir,f))
p <- strsplit(f,'_')
pID <- paste(p[[1]][1],p[[1]][2],sep='_')
df$pID <- pID
# df<- df %>% dplyr::select(c("pID", "type","latency","fixDur","task","identifier" , "page_fixation_ix","IA_ID","n400_magnitude_CPz","n400_latency_CPz" ))
df<- df %>% filter(task=='reading') %>% droplevels()
all[[i]] <- df
}
df <- do.call(rbind, all)
# Merge with text info for surprisal values
df <- df %>% merge(text, on=c("IA_ID","identifier"))
df <- df %>% rename(surprisalText=opt.125m_surprisal_wholetext, surprisalPage = opt.125m_surprisal_page,fixDur=duration)
# drop rows with no IA
df <- drop_na(df, "IA_LABEL")
# drop rows with punc
df <- df %>% filter(! IA_LABEL %in% c('.',',','-','--',';',':',"'",'?','!'))
feats <- grep("n400|fixDur",colnames(df), value=T)
feats_eeg <- grep("n400",colnames(df), value=T)
plots
df_long <- df %>% pivot_longer(all_of(c(feats,'logFixDur')), names_to="feature",values_to="value")
df_long <- df_long %>% mutate(channel=str_match(feature, 'n400_\\w*_([[:alnum:]]{2,5})$')[,2],
feature_type=str_match(feature, '(n400_\\w*)_[[:alnum:]]{2,5}$')[,2])
# compute lower and upper whiskers for each group
ylims <- df_long %>%
group_by(feature_type) %>%
summarise(Q1 = quantile(value, 1/100), Q3 = quantile(value, 99/100)) %>%
ungroup()
p1<-ggplot(df_long %>% filter(feature_type=='n400_mean')) +
geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_mean')
p2<-ggplot(df_long %>% filter(feature_type=='n400_min_magnitude')) +
geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_min')
p3<-ggplot(df_long %>% filter(feature_type=='n400_max_magnitude')) +
geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_max')
grid.arrange(p1,p2,p3)

ggplot(df)+
geom_density(aes(x=fixDur))+
scale_x_log10()

ggplot(df)+
geom_density(aes(x=surprisalText))+
scale_x_log10()

ggplot(df_long)+
geom_violin(aes(x=value,y=factor(channel), fill=factor(channel),outliers = FALSE), alpha=0.3)+
facet_wrap(~feature_type, scales="free")

ggplot(df_long %>% filter(feature_type =='n400_mean'))+
geom_point(aes(x=surprisalText,y=value, color=factor(channel)), alpha=0.05, size=0.5)+
facet_wrap(~channel, scales="free")

repeat some plots after transformatiions
ggplot(df,aes(x=logSurprisalText, y=logFixDur)) +
geom_point(size=1, alpha=0.01)+stat_cor(method="pearson")

ggplot(df_long %>% filter(feature_type =='n400_mean'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
facet_wrap(~channel, scales="free")

ggplot(df_long %>% filter(feature_type =='n400_max_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
facet_wrap(~channel, scales="free")

ggplot(df_long %>% filter(feature_type =='n400_min_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
facet_wrap(~channel, scales="free")

LME stats
predict EEG from LOG surprisal and fixDur
m.l.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.min <- lmer(n400_max_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.max <- lmer(n400_min_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.zc <- lmer(n400_zero_crossings_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
tab_model(m.l.m, m.l.min, m.l.max, m.l.zc, show.ci=F)
|
|
n 400 mean C Pz
|
n 400 max magnitude C Pz
|
n 400 min magnitude C Pz
|
n 400 zero crossings C Pz
|
|
Predictors
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
|
(Intercept)
|
17.14
|
<0.001
|
95.98
|
<0.001
|
-64.58
|
<0.001
|
1.93
|
<0.001
|
|
logFixDur
|
-2.85
|
<0.001
|
-3.65
|
<0.001
|
-1.86
|
0.014
|
0.03
|
0.049
|
|
logSurprisalText
|
-0.91
|
<0.001
|
-1.00
|
<0.001
|
-0.74
|
0.004
|
0.00
|
0.888
|
|
Random Effects
|
|
σ2
|
10397.76
|
10716.54
|
12209.36
|
4.91
|
|
τ00
|
98.97 pID
|
908.07 pID
|
702.36 pID
|
0.51 pID
|
|
|
26.39 identifier
|
29.42 identifier
|
31.20 identifier
|
0.02 identifier
|
|
ICC
|
0.01
|
0.08
|
0.06
|
0.10
|
|
N
|
50 identifier
|
50 identifier
|
50 identifier
|
50 identifier
|
|
|
91 pID
|
91 pID
|
91 pID
|
91 pID
|
|
Observations
|
104272
|
104272
|
104272
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.012
|
0.000 / 0.081
|
0.000 / 0.057
|
0.000 / 0.098
|
dotplot(ranef(m.l.m))
## $pID

##
## $identifier

predict fixDur from surprisal
m.d <- lmer(fixDur ~ (1|identifier) + (1|pID) + logSurprisalText, data=df)
tab_model(m.d)
|
|
fix Dur
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
208.40
|
202.58 – 214.22
|
<0.001
|
|
logSurprisalText
|
1.81
|
1.38 – 2.24
|
<0.001
|
|
Random Effects
|
|
σ2
|
8839.40
|
|
τ00 pID
|
729.69
|
|
τ00 identifier
|
31.32
|
|
ICC
|
0.08
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
104272
|
|
Marginal R2 / Conditional R2
|
0.001 / 0.080
|
predict fixation fixDur from EEG features
m.eye <- lmer(fixDur ~ (1|identifier) + (1|pID) + n400_mean_CPz, data=df)
tab_model(m.eye)
|
|
fix Dur
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
210.22
|
204.41 – 216.02
|
<0.001
|
|
n400 mean CPz
|
-0.01
|
-0.01 – -0.00
|
0.003
|
|
Random Effects
|
|
σ2
|
8844.38
|
|
τ00 pID
|
729.05
|
|
τ00 identifier
|
32.34
|
|
ICC
|
0.08
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.079
|
predict EEG and surprisal from previous IA surprisal
mp.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText + prev_logSurprisalText, data=df)
tab_model(mp.m)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
16.72
|
9.04 – 24.40
|
<0.001
|
|
logFixDur
|
-2.71
|
-4.08 – -1.33
|
<0.001
|
|
logSurprisalText
|
-0.87
|
-1.34 – -0.40
|
<0.001
|
|
prev logSurprisalText
|
-0.52
|
-0.99 – -0.06
|
0.028
|
|
Random Effects
|
|
σ2
|
10336.85
|
|
τ00 pID
|
100.21
|
|
τ00 identifier
|
26.79
|
|
ICC
|
0.01
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
102838
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.013
|
mp.s <- lm( logSurprisalText ~ prev_logSurprisalText, data=df)
tab_model(mp.s)
|
|
log Surprisal Text
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
0.89
|
0.88 – 0.90
|
<0.001
|
|
prev logSurprisalText
|
0.15
|
0.14 – 0.15
|
<0.001
|
|
Observations
|
102838
|
|
R2 / R2 adjusted
|
0.022 / 0.022
|
model including bool for refixation
mr.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + refixation*logSurprisalText, data=df)
tab_model(mr.m)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
16.93
|
9.23 – 24.62
|
<0.001
|
|
logFixDur
|
-2.86
|
-4.22 – -1.49
|
<0.001
|
|
refixation [1]
|
0.54
|
-1.08 – 2.15
|
0.515
|
|
logSurprisalText
|
-0.49
|
-1.24 – 0.25
|
0.192
|
refixation [1] * logSurprisalText
|
-0.68
|
-1.63 – 0.27
|
0.160
|
|
Random Effects
|
|
σ2
|
10397.75
|
|
τ00 pID
|
99.06
|
|
τ00 identifier
|
26.38
|
|
ICC
|
0.01
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.012
|
Anova(mr.m)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: n400_mean_CPz
## Chisq Df Pr(>Chisq)
## logFixDur 16.8361 1 0.00004075 ***
## refixation 0.0206 1 0.8859451
## logSurprisalText 14.5204 1 0.0001387 ***
## refixation:logSurprisalText 1.9730 1 0.1601283
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_model(mr.m,type = "emm", terms=c("logSurprisalText", "refixation"))

tb<-tableone::CreateTableOne(data = df, vars = c("n400_mean_CPz", "logFixDur"), strata = 'refixation', test=T)
knitr::kable(print(tb))
## Stratified by refixation
## 0 1 p test
## n 36498 67774
## n400_mean_CPz (mean (SD)) 1.15 (105.74) 0.98 (100.75) 0.795
## logFixDur (mean (SD)) 5.27 (0.44) 5.24 (0.49) <0.001
| n |
36498 |
67774 |
|
|
| n400_mean_CPz (mean (SD)) |
1.15 (105.74) |
0.98 (100.75) |
0.795 |
|
| logFixDur (mean (SD)) |
5.27 (0.44) |
5.24 (0.49) |
<0.001 |
|
predict EEG from surprisal and fixDur with random slope of surprisal for participant
m.rs.m <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logSurprisalText + logFixDur, data=df)
m.rs.min <- lmer(n400_max_magnitude_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logFixDur + logSurprisalText, data=df)
m.rs.max <- lmer(n400_min_magnitude_CPz ~(1+logSurprisalText|pID) + (1|identifier) + logFixDur + logSurprisalText, data=df)
tab_model(m.rs.m, m.rs.min, m.rs.max, show.ci=F)
|
|
n 400 mean C Pz
|
n 400 max magnitude C Pz
|
n 400 min magnitude C Pz
|
|
Predictors
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
|
(Intercept)
|
17.12
|
<0.001
|
95.94
|
<0.001
|
-64.66
|
<0.001
|
|
logSurprisalText
|
-0.93
|
0.003
|
-1.01
|
0.002
|
-0.74
|
0.019
|
|
logFixDur
|
-2.84
|
<0.001
|
-3.64
|
<0.001
|
-1.84
|
0.015
|
|
Random Effects
|
|
σ2
|
10391.53
|
10709.01
|
12203.99
|
|
τ00
|
112.30 pID
|
907.02 pID
|
706.83 pID
|
|
|
26.66 identifier
|
29.54 identifier
|
31.31 identifier
|
|
τ11
|
3.30 pID.logSurprisalText
|
3.99 pID.logSurprisalText
|
2.88 pID.logSurprisalText
|
|
ρ01
|
-0.43 pID
|
-0.04 pID
|
-0.05 pID
|
|
ICC
|
0.01
|
0.08
|
0.06
|
|
N
|
91 pID
|
91 pID
|
91 pID
|
|
|
50 identifier
|
50 identifier
|
50 identifier
|
|
Observations
|
104272
|
104272
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.013
|
0.000 / 0.081
|
0.000 / 0.057
|
dotplot(ranef(m.rs.m))
## $pID

##
## $identifier

Modeling with comprehension/MW
Load Behavioural data
df_comp <- read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EyeMindLink/Processed/Behaviour/EML1_page_level.csv')
df_comp <- df_comp %>% rename(pID=ParticipantID) %>%
mutate(Page=PageNum-1,
identifier=paste0(Text, Page))
df <- merge(df, df_comp, on=c("pID","identifier"))
df_mw <- df %>% drop_na(MW) %>% mutate(MW=as.factor(MW))
model FIXATION LEVEL N400 ~ surprisal*MW (caveat - MW is at page level)
m.mw <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logSurprisalText*MW , data=df_mw)
Anova(m.mw)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: n400_mean_CPz
## Chisq Df Pr(>Chisq)
## logSurprisalText 0.5167 1 0.4723
## MW 0.5501 1 0.4583
## logSurprisalText:MW 1.2574 1 0.2621
tab_model(m.mw)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
3.71
|
-1.92 – 9.34
|
0.196
|
|
logSurprisalText
|
0.06
|
-1.29 – 1.40
|
0.934
|
|
MW [1]
|
2.99
|
-1.81 – 7.79
|
0.222
|
|
logSurprisalText * MW [1]
|
-1.22
|
-3.36 – 0.91
|
0.262
|
|
Random Effects
|
|
σ2
|
9889.06
|
|
τ00 pID
|
333.44
|
|
τ00 identifier
|
55.66
|
|
τ11 pID.logSurprisalText
|
2.97
|
|
ρ01 pID
|
-0.85
|
|
ICC
|
0.03
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.033
|
plot_model(m.mw,type = "emm", terms=c("logSurprisalText", "MW"))

plot_model(m.mw,type = "emm", terms=c("logFixDur", "MW"))
## `logFixDur` was not found in model terms. Maybe misspelled?
## Can't compute estimated marginal means, 'emmeans::emmeans()' returned an error.
##
## Reason: No variable named logFixDur in the reference grid
## You may try 'ggpredict()' or 'ggeffect()'.
## NULL
emmeans(m.mw, pairwise ~ MW, p.adjust = "fdr")
## $emmeans
## MW emmean SE df asymp.LCL asymp.UCL
## 0 3.77 2.68 Inf -1.476 9.01
## 1 5.47 2.90 Inf -0.223 11.16
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## 0 - 1 -1.7 2.08 Inf -0.814 0.4155
##
## Degrees-of-freedom method: asymptotic
bobby’s mode: MW label at page level, surprosal and n400 at fixation level
# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB <- lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: n400_mean_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |
## pID) + (logSurprisalText | pID:identifier)
## Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 268460.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.8943 -0.5463 0.0153 0.5626 5.7741
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pID:identifier (Intercept) 1654.44 40.675
## logSurprisalText 18.15 4.260 -1.00
## pID (Intercept) 579.29 24.068
## MW1 11698.49 108.160 0.02
## logSurprisalText 20.62 4.541 -0.19 -0.86
## MW1:logSurprisalText 389.91 19.746 -0.26 -0.95 0.76
## Residual 9416.73 97.040
## Number of obs: 22322, groups: pID:identifier, 301; pID, 91
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.7955 4.3513 64.0041 1.102 0.275
## MW1 4.6060 15.1917 461.7926 0.303 0.762
## logSurprisalText 0.2108 0.9029 44.2160 0.234 0.816
## MW1:logSurprisalText -1.8703 3.0574 18.9739 -0.612 0.548
##
## Correlation of Fixed Effects:
## (Intr) MW1 lgSrpT
## MW1 -0.209
## lgSrprslTxt -0.455 -0.144
## MW1:lgSrprT 0.046 -0.909 -0.021
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
tab_model(mB)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
4.80
|
-3.73 – 13.32
|
0.270
|
|
MW [1]
|
4.61
|
-25.17 – 34.38
|
0.762
|
|
logSurprisalText
|
0.21
|
-1.56 – 1.98
|
0.815
|
|
MW [1] * logSurprisalText
|
-1.87
|
-7.86 – 4.12
|
0.541
|
|
Random Effects
|
|
σ2
|
9416.73
|
|
τ00 pID:identifier
|
1654.44
|
|
τ00 pID
|
579.29
|
|
τ11 pID:identifier.logSurprisalText
|
18.15
|
|
τ11 pID.MW1
|
11698.49
|
|
τ11 pID.logSurprisalText
|
20.62
|
|
τ11 pID.MW1:logSurprisalText
|
389.91
|
|
ρ01 pID:identifier
|
-1.00
|
|
ρ01 pID.MW1
|
0.02
|
|
ρ01 pID.logSurprisalText
|
-0.19
|
|
ρ01 pID.MW1:logSurprisalText
|
-0.26
|
|
ICC
|
0.34
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.338
|
min n400
# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.min <- lmer(n400_min_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.min)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## n400_min_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |
## pID) + (logSurprisalText | pID:identifier)
## Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 272324.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.8703 -0.5359 0.0344 0.5788 5.1761
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pID:identifier (Intercept) 1897.2779 43.558
## logSurprisalText 14.9662 3.869 -0.74
## pID (Intercept) 1288.5762 35.897
## MW1 104.7103 10.233 -0.55
## logSurprisalText 0.5579 0.747 -0.64 0.84
## MW1:logSurprisalText 35036.4903 187.180 0.38 -0.49 -0.44
## Residual 11069.8068 105.213
## Number of obs: 22322, groups: pID:identifier, 301; pID, 91
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -75.105 5.380 98.438 -13.959 <0.0000000000000002
## MW1 8.644 6.838 125.248 1.264 0.209
## logSurprisalText 0.130 0.782 138.419 0.166 0.868
## MW1:logSurprisalText 2.405 24.482 4951.092 0.098 0.922
##
## (Intercept) ***
## MW1
## logSurprisalText
## MW1:logSurprisalText
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MW1 lgSrpT
## MW1 -0.513
## lgSrprslTxt -0.354 0.253
## MW1:lgSrprT 0.196 -0.050 -0.063
## optimizer (Nelder_Mead) convergence code: 4 (failure to converge in 10000 evaluations)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## failure to converge in 10000 evaluations
tab_model(mB.min)
|
|
n 400 min magnitude C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
-75.10
|
-85.65 – -64.56
|
<0.001
|
|
MW [1]
|
8.64
|
-4.76 – 22.05
|
0.206
|
|
logSurprisalText
|
0.13
|
-1.40 – 1.66
|
0.868
|
|
MW [1] * logSurprisalText
|
2.41
|
-45.58 – 50.39
|
0.922
|
|
Random Effects
|
|
σ2
|
11069.81
|
|
τ00 pID:identifier
|
1897.28
|
|
τ00 pID
|
1288.58
|
|
τ11 pID:identifier.logSurprisalText
|
14.97
|
|
τ11 pID.MW1
|
104.71
|
|
τ11 pID.logSurprisalText
|
0.56
|
|
τ11 pID.MW1:logSurprisalText
|
35036.49
|
|
ρ01 pID:identifier
|
-0.74
|
|
ρ01 pID.MW1
|
-0.55
|
|
ρ01 pID.logSurprisalText
|
-0.64
|
|
ρ01 pID.MW1:logSurprisalText
|
0.38
|
|
ICC
|
0.79
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.001 / 0.790
|
max n400
# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.max <- lmer(n400_max_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.max)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## n400_max_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |
## pID) + (logSurprisalText | pID:identifier)
## Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 269299.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.6498 -0.5492 0.0005 0.5668 5.7653
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pID:identifier (Intercept) 1720.701 41.481
## logSurprisalText 0.996 0.998 -1.00
## pID (Intercept) 1540.422 39.248
## MW1 325.368 18.038 -1.00
## logSurprisalText 11.519 3.394 0.36 -0.38
## MW1:logSurprisalText 694.098 26.346 0.40 -0.39 -0.21
## Residual 9745.094 98.717
## Number of obs: 22322, groups: pID:identifier, 301; pID, 91
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 79.8938 5.5434 49.4935 14.412 <0.0000000000000002
## MW1 4.2239 6.4477 153.9681 0.655 0.513
## logSurprisalText 0.3479 0.7790 67.8464 0.447 0.657
## MW1:logSurprisalText -0.5603 3.6415 365.7632 -0.154 0.878
##
## (Intercept) ***
## MW1
## logSurprisalText
## MW1:logSurprisalText
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MW1 lgSrpT
## MW1 -0.641
## lgSrprslTxt -0.031 0.069
## MW1:lgSrprT 0.228 -0.119 -0.226
## optimizer (Nelder_Mead) convergence code: 4 (failure to converge in 10000 evaluations)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## failure to converge in 10000 evaluations
tab_model(mB.max)
|
|
n 400 max magnitude C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
79.89
|
69.03 – 90.76
|
<0.001
|
|
MW [1]
|
4.22
|
-8.41 – 16.86
|
0.512
|
|
logSurprisalText
|
0.35
|
-1.18 – 1.87
|
0.655
|
|
MW [1] * logSurprisalText
|
-0.56
|
-7.70 – 6.58
|
0.878
|
|
Random Effects
|
|
σ2
|
9745.09
|
|
τ00 pID:identifier
|
1720.70
|
|
τ00 pID
|
1540.42
|
|
τ11 pID:identifier.logSurprisalText
|
1.00
|
|
τ11 pID.MW1
|
325.37
|
|
τ11 pID.logSurprisalText
|
11.52
|
|
τ11 pID.MW1:logSurprisalText
|
694.10
|
|
ρ01 pID:identifier
|
-1.00
|
|
ρ01 pID.MW1
|
-1.00
|
|
ρ01 pID.logSurprisalText
|
0.36
|
|
ρ01 pID.MW1:logSurprisalText
|
0.40
|
|
ICC
|
0.28
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.279
|
Eye-mind-linkage metric at page level
note atanh transform for correlation when it is to be used in model
eml_page <- df %>% group_by(pID, identifier) %>% summarise(
count=n(),
cor_n400meanCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_mean_CPz)),
cor_n400minCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_min_magnitude_CPz)),
cor_n400maxCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_max_magnitude_CPz)),
cor_logFixDur_logSurprisalText = atanh(cor(logSurprisalText, logFixDur)),
meanLogSurprisalText=mean(logSurprisalText)
) %>% drop_na() %>%
filter(count>2) # remove instances with single or two fixation on page as correlation coeff will be 1 or -1
eml_page[Reduce(`&`, lapply(eml_page, is.finite)),]
## # A tibble: 0 × 8
## # Groups: pID [0]
## # … with 8 variables: pID <chr>, identifier <fct>, count <int>,
## # cor_n400meanCPz_logSurprisalText <dbl>,
## # cor_n400minCPz_logSurprisalText <dbl>,
## # cor_n400maxCPz_logSurprisalText <dbl>,
## # cor_logFixDur_logSurprisalText <dbl>, meanLogSurprisalText <dbl>
df_page <- left_join(eml_page, df_comp, on=c('pID', 'identifer'))
df_page$MW = as.factor(df_page$MW)
# plot the new metrics
p1<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_n400meanCPz_logSurprisalText, color=MW))
p2<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_logFixDur_logSurprisalText, color=MW))
p3<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_n400minCPz_logSurprisalText, color=MW))
p4<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_n400maxCPz_logSurprisalText, color=MW))
grid.arrange(p1,p2,p3,p4)

# plot by participantID
ggplot(df_page, aes(x=cor_n400meanCPz_logSurprisalText, y=pID)) +
geom_boxplot( horizontal = TRUE, outlier.colour="red",
outlier.fill="red",
outlier.size=0.2)

binomial regression: predict page level MW from page level correlations with surprisal
m.mw.mean <- glmer(MW ~ cor_n400meanCPz_logSurprisalText + (1|identifier) + (1+cor_n400meanCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.mean,type = "emm", terms=c("cor_n400meanCPz_logSurprisalText"))

m.mw.min <- glmer(MW ~ cor_n400minCPz_logSurprisalText + (1|identifier) + (1+cor_n400minCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.min,type = "emm", terms=c("cor_n400minCPz_logSurprisalText"))

m.mw.max <- glmer(MW ~ cor_n400maxCPz_logSurprisalText + (1|identifier) + (1+cor_n400maxCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.max,type = "emm", terms=c("cor_n400maxCPz_logSurprisalText"))

m.mw.all <- glmer(MW ~ cor_n400meanCPz_logSurprisalText + cor_n400minCPz_logSurprisalText +cor_n400maxCPz_logSurprisalText + (1|identifier) + (1|pID), data=df_page %>% drop_na(MW), family = "binomial")
tab_model(m.mw.mean, m.mw.min, m.mw.max)
|
|
MW
|
MW
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.47
|
0.27 – 0.83
|
0.009
|
0.47
|
0.27 – 0.83
|
0.009
|
0.48
|
0.47 – 0.48
|
<0.001
|
cor n400meanCPz logSurprisalText
|
0.31
|
0.08 – 1.26
|
0.101
|
|
|
|
|
|
|
cor n400minCPz logSurprisalText
|
|
|
|
0.26
|
0.05 – 1.24
|
0.090
|
|
|
|
cor n400maxCPz logSurprisalText
|
|
|
|
|
|
|
0.56
|
0.55 – 0.56
|
<0.001
|
|
Random Effects
|
|
σ2
|
3.29
|
3.29
|
3.29
|
|
τ00
|
1.28 pID
|
1.27 pID
|
1.25 pID
|
|
|
0.70 identifier
|
0.70 identifier
|
0.68 identifier
|
|
τ11
|
0.02 pID.cor_n400meanCPz_logSurprisalText
|
0.02 pID.cor_n400minCPz_logSurprisalText
|
0.26 pID.cor_n400maxCPz_logSurprisalText
|
|
ρ01
|
-1.00 pID
|
-1.00 pID
|
-1.00 pID
|
|
ICC
|
0.38
|
0.38
|
0.37
|
|
N
|
20 identifier
|
20 identifier
|
20 identifier
|
|
|
91 pID
|
91 pID
|
91 pID
|
|
Observations
|
287
|
287
|
287
|
|
Marginal R2 / Conditional R2
|
0.039 / 0.400
|
0.028 / 0.393
|
0.009 / 0.380
|
tab_model(m.mw.mean)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.47
|
0.27 – 0.83
|
0.009
|
cor n400meanCPz logSurprisalText
|
0.31
|
0.08 – 1.26
|
0.101
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.28
|
|
τ00 identifier
|
0.70
|
|
τ11 pID.cor_n400meanCPz_logSurprisalText
|
0.02
|
|
ρ01 pID
|
-1.00
|
|
ICC
|
0.38
|
|
N identifier
|
20
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.039 / 0.400
|
tab_model(m.mw.all)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.47
|
0.27 – 0.83
|
0.009
|
cor n400meanCPz logSurprisalText
|
0.19
|
0.00 – 24.74
|
0.503
|
cor n400minCPz logSurprisalText
|
0.94
|
0.01 – 76.33
|
0.978
|
cor n400maxCPz logSurprisalText
|
1.78
|
0.20 – 15.68
|
0.606
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.31
|
|
τ00 identifier
|
0.71
|
|
ICC
|
0.38
|
|
N identifier
|
20
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.046 / 0.410
|
emmeans(m.mw.mean, pairwise ~ cor_n400meanCPz_logSurprisalText, p.adjust = "fdr")
## $emmeans
## cor_n400meanCPz_logSurprisalText emmean SE df asymp.LCL asymp.UCL
## 0.000208 -0.753 0.287 Inf -1.32 -0.189
##
## Results are given on the logit (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## (nothing) nonEst NA NA NA NA
##
## Note: contrasts are still on the logit scale
Does page-average surprisal predict MW
m.mw.sp <- glmer(MW ~ meanLogSurprisalText + (1+meanLogSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
tab_model(m.mw.sp)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.25
|
0.08 – 0.81
|
0.020
|
|
meanLogSurprisalText
|
1.96
|
0.61 – 6.24
|
0.256
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.91
|
|
τ11 pID.meanLogSurprisalText
|
4.98
|
|
ρ01 pID
|
-0.93
|
|
ICC
|
0.36
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.010 / 0.367
|